---
title: "04 Matching -- Perinatal"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
output: pdf_document
---


# Setup
```{r, message = FALSE, warning = FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(gt)
library(gtsummary)
library(tidyverse)
library(survey)
library(gridExtra)
library(grid)
library(foreign)
library(webshot)
library(MatchIt)
library(stringr)
```


```{r}
perinatal<-readRDS("data/cleaned_perinatal_did.rds")
```

## Coarsened Exact Matching: by quintile of wealth score [eval = F]
```{r matching}
## Match households by wealth score quintile: 

# 1. Matching Procedure: 
  # a) Only match on pre-treatment, so match households in 2014  based on wealth quintiles 
  # b) 1:1 match intervention to control

  # Coarsened Exact Matching & explanation of options:
  # k2k: If TRUE nearest neighbor matching without replacement will take place within each stratum, and any unmatched units will be dropped (e.g., if there are more treated than control units in the stratum, the treated units without a match will be dropped)
  # k2k.method: how the distance between units should be calculated if k2k = TRUE

df <- perinatal %>% 
  filter(year == 0) %>%
  matchit(formula = group ~ menage_score_bien_etre,
          method = "cem", cutpoints = list(menage_score_bien_etre = 5), 
          k2k = TRUE, k2k.method = "euclidean") 
summary(df)
df <- match.data(df)
  #euclidean ok because we are only matching on 1 var, no need to "weight" vars based on effect on dependent var
  #use 5 bins for quintiles (generated automatcially by MatchIt)

# 2. Add in data from matched households from 2016 to create total women antenatal dataset
women_match_perinatal <- perinatal %>%
  filter(year == 1) %>%
  subset(ID_menage %in% df$ID_menage) %>%
  mutate(subclass = df$subclass[match(ID_menage, df$ID_menage)]) %>%
  mutate(weights = 1) %>% ## change if we change the k2k option in MatchIt
  rbind(df) %>%
  arrange(subclass, year)

rm(df) # remove original matched dataframe from R

saveRDS(women_match_perinatal, "data/match_women_perinatal.rds")
```

## After matching, assign survey design to matched dataset
```{r}
svy_perinatal_match <- svydesign(
    id = ~cluster, strata = ~group, weights = ~wmweight,
    data = women_match_perinatal
  )
# Create subsets for each year
  svy_perinatal_match_2014 <- subset(svy_perinatal_match, year == 0)
  svy_perinatal_match_2016 <- subset(svy_perinatal_match, year == 1)
```

### DiD regression adjusting for wealth index score

 * Outcome: Delivery at PHF
```{r}
#Adjusted DID regression on PHF delivery
  did_perinatal_match <- svyglm(durmat.publichealthcenter.delivery~year*group+menage_score_bien_etre, design = svy_perinatal_match, family = gaussian)
```

## Table 3 for perinatal women
 * Outcome: Delivery at PHF
 
### Prepare Table 3
 

```{r}
table3_perinatal <- 
  as.data.frame(array(data = NA, dim = c(2, 19)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table3_perinatal) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", 
    "p-val_2014", "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016",
    "trends_rel", "trends_abs",  "dif.in.dif(man)", "dif.in.dif(coef)", "p-val_did")

table3_perinatal$indicator <- "durmat.publichealthcenter.delivery"

# Delivery at PHF
## Summary stats for 2014
table3_perinatal[rev(c(1,2)), 2] <- c(0,1)
table3_perinatal[rev(c(1,2)), 3] <- 
    round(svytable( ~group, design = svy_perinatal_match_2014)) # 'd_2014'
table3_perinatal[rev(c(1,2)), 4] <- 
    round(coef(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2014, FUN = svytotal, keep.var = T, na.rm = T))) # 'n_2014'
table3_perinatal[rev(c(1,2)), 5] <- 
    round(coef(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'm_2014'
table3_perinatal[rev(c(1,2)), 6] <- 
    round(SE(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2014'
table3_perinatal[rev(c(1,2)), 7] <- table3_perinatal[1, 5] - table3_perinatal[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table3_perinatal[1, 4] == 0 & 
      table3_perinatal[2, 4] == 0) {
    table3_perinatal[2, 8] <- NA
  } else {
    table3_perinatal[2, 8] <- 
      round(svychisq(~durmat.publichealthcenter.delivery+group, design = svy_perinatal_match_2014)$p.value,2)
  }

## Summary stats for 2016
table3_perinatal[rev(c(1,2)), 9] <- 
    round(svytable( ~group, design = svy_perinatal_match_2016)) # 'd_2016'
table3_perinatal[rev(c(1,2)), 10] <- 
    round(coef(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2016, FUN = svytotal,keep.var = T, na.rm = T))) # 'n_2016'
table3_perinatal[rev(c(1,2)), 11] <- 
    round(coef(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2016, FUN = svymean, keep.var = T, na.rm = T))*100,2) # 'm_2016'
table3_perinatal[rev(c(1,2)), 12] <- 
    round(SE(svyby(~durmat.publichealthcenter.delivery, by = ~group, design = svy_perinatal_match_2016, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2016'
table3_perinatal[rev(c(1,2)), 13] <- table3_perinatal[1, 11] - table3_perinatal[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table3_perinatal[1, 10] == 0 & 
      table3_perinatal[2, 10] == 0) {
    table3_perinatal[2, 14] <- NA
  } else {
    table3_perinatal[2, 14] <- 
      round(svychisq(~durmat.publichealthcenter.delivery+group, design = svy_perinatal_match_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table3_perinatal[rev(c(1, 2)), 15] <- round((table3_perinatal[rev(c(1, 2)), 11] - 
       table3_perinatal[rev(c(1, 2)), 5]) / table3_perinatal[rev(c(1, 2)), 5]*100,2) #'trends_rel'
  table3_perinatal[rev(c(1, 2)), 16] <- table3_perinatal[rev(c(1, 2)), 11] - 
    table3_perinatal[rev(c(1, 2)), 5] #'trends_abs'
  table3_perinatal[2, 17] <- table3_perinatal[1, 16] - table3_perinatal[2, 16] #'dif.in.dif(man)'

## DiD stats 
  # 'dif.in.dif(coef)': Coefficient of Difference-in-Differences comparison 
  table3_perinatal[2, 18] <- round(summary(did_perinatal_match)$coefficients[5, 1]*100,2)
  # 'p-val_did': p-value of Difference-in-Differences comparison
  table3_perinatal[2, 19] <- round(summary(did_perinatal_match)$coefficients[5, 4],2)
```

### Final Table 3 Antenatal
```{r}
# Replicate final Table 2
final_table3_perinatal<-table3_perinatal
final_table3_perinatal$m_2014 <- sprintf("%.2f (%.2f)", final_table3_perinatal$m_2014, 
                                       final_table3_perinatal$se_2014)
final_table3_perinatal$m_2016 <- sprintf("%.2f (%.2f)", final_table3_perinatal$m_2016, 
                                       final_table3_perinatal$se_2016)
final_table3_perinatal$dif_2014 <- sprintf("%.2f (%.2f)", final_table3_perinatal$dif_2014, 
                                         final_table3_perinatal$`p-val_2014`)
final_table3_perinatal$dif_2016 <- sprintf("%.2f (%.2f)", final_table3_perinatal$dif_2016, 
                                         final_table3_perinatal$`p-val_2016`)
final_table3_perinatal$`dif.in.dif(coef)` <- sprintf("%.2f (%.2f)", 
                                                  final_table3_perinatal$`dif.in.dif(coef)`, 
                                                  final_table3_perinatal$`p-val_did`)
final_table3_perinatal$se_2014 <- NULL
final_table3_perinatal$se_2016 <- NULL
final_table3_perinatal$`p-val_2014` <- NULL
final_table3_perinatal$`p-val_2016` <- NULL
final_table3_perinatal$`p-val_did` <- NULL
final_table3_perinatal$`dif.in.dif(man)` <- NULL
final_table3_perinatal$dif_2014[grep("NA", final_table3_perinatal$dif_2014)] <- "--"
final_table3_perinatal$dif_2016[grep("NA", final_table3_perinatal$dif_2016)] <- "--"
final_table3_perinatal$`dif.in.dif(coef)`[
  grep("NA", final_table3_perinatal$`dif.in.dif(coef)`)] <- "--"

final_table3_perinatal <- as_tibble(final_table3_perinatal) %>% 
  select(-c(n_2014, n_2016, indicator)) %>%
  mutate(
    group = case_when(group == 1 ~ "IG", 
                      group == 0 ~ "NG"))
```

```{r}
gt(final_table3_perinatal) %>%
  tab_header(title = ("Table S3. Summary of responses and trends from the 2014 and 2016 IHOPE surveys for delivery at a Public Health Facility among pregnant women (15-49 years old)")) %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs,`dif.in.dif(coef)`)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
  cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", 
             `dif.in.dif(coef)` = "Adjusted DiD (p-value)") %>%
  tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
  gt::gtsave("table_fig/paper/Supplemental/table_s3_perinatal_match.png")
```


## Figure 3: Perinatal
```{r}
# Figure:
# Generate figure based on table
library(stringr)
num_var <- 1

### Can RENAME to fig_outcome but then update wherever fig below ###
fig <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

###  UPDATE TABLE NAME AS NEEDED ###
fig$V1 <- round(c(
  table3_perinatal$m_2014, table3_perinatal$m_2016), 2)
fig$V2 <- round(c(
  table3_perinatal$n_2014, table3_perinatal$n_2016), 0)
fig$V3 <- round(c(
  table3_perinatal$d_2014, table3_perinatal$d_2016), 0)
fig$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig$V6 <- rep("Perinatal Care-seeking", num_var * 4) ### RENAME BASED ON OUTCOME ###
colnames(fig) <- c("mean", "num", "dem", "group", "year", "indicator")
fig$group <- factor(fig$group, levels = c("Non-Intervention", "Intervention"))

figure <- 
ggplot(fig, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Matching on Household Wealth Index" ### UPDATE TITLE BASED ON OUTCOME ###
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12)
  )+
  annotate("text", label="}", x=30, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table3_perinatal$ `dif.in.dif(coef)`[2]), x=50, y=1.5)

saveRDS(figure,"table_fig/perinatal/fig_peri_match.rds")
ggsave("table_fig/perinatal/fig3perinatal_match.png")
```


# Balance Before and After Matching

```{r}
perinatal_notmatch <- readRDS("data/cleaned_perinatal_did.rds") %>%
    mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 0) 

perinatal_match<-readRDS("data/match_women_perinatal.rds")%>%
  mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 1) %>%
  select(-c(subclass, weights))

perinatal_full <- rbind(perinatal_match, perinatal_notmatch)
perinatal_full$match <- factor(perinatal_full$match, levels = c("0", "1"),
                  labels = c("Unmatched", "Matched")
                  )

perinatal_full %>%
  ggplot() +
  geom_density(aes(menage_score_bien_etre, fill = group_year), alpha = 0.4, adjust = 1.5) +
  labs(x = "Wealth Index Score", title = "Fig. S4 Balance Before and After Matching For Households with Postpartum Women", fill = "Treatment and Period") +
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod")) +
  theme_bw()+
  theme(plot.title = element_text(size=8), 
        axis.line = element_line(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  facet_wrap(~match)
ggsave("table_fig/paper/Supplemental/fig_s4_matching_balance_perinatal.png", width=8, height=4, dpi=300)
```